# clear
rm(list=ls())

options(warn=0)

# libs
library(haven)
library(data.table)
library(lubridate)
library(tidyverse)

outdate <- gsub("-","_",today())

# read data
X <- data.table(read.csv("01_InData/ltg_forecasts.csv"))
X<-na.omit(X,cols="TICKER")
IBES_CRSP<-data.table(read.csv("01_InData/IBES_CRSP_link_table.csv"))
X<-X[, max_anntims :=max(ANNTIMS),by=c("ANALYS", "TICKER","ANNDATS")]
X<-X[ANNTIMS==max_anntims,]
X<-X[, ANNDATS:=as.Date(as.character(ANNDATS), "%Y%m%d")]


# Merge with recommendations
Recs<- data.table(read.csv("01_InData/analyst_recommendations.csv"))

# Only consider the last recommendation of the day
Recs <- Recs[order(AMASKCD,TICKER, ANNDATS, ANNTIMS)]
Recs<-Recs[, max_anntims :=max(ANNTIMS),by=c("AMASKCD", "TICKER","ANNDATS")]
Recs<-Recs[ANNTIMS==max_anntims,]
Recs<-Recs[, ANNDATS:=as.Date(as.character(ANNDATS), "%Y%m%d")]


# Merge 
X_Recs<-
  merge(
    X,
    Recs,
    by.x=c("ANALYS", "TICKER"),
    by.y=c("AMASKCD", "TICKER"),
    allow.cartesian=TRUE
  )

# Want to keep the forecasts matched with the most recent recommendation of the analyst
setnames(X_Recs, "ANNDATS.y", "ANNDATS")
setnames(X_Recs, "ANNDATS.x", "anndats_revision")

X_Recs<-X_Recs[ANNDATS<anndats_revision,]
X_Recs<-X_Recs[, max_ANNDATS :=max(ANNDATS),by=c("ANALYS", "TICKER", "anndats_revision")]
X_Recs<-X_Recs[max_ANNDATS==ANNDATS,]

# Determine the last forecast per analyst, firm, year
X_Recs<-X_Recs[, anndats_year := format(anndats_revision, "%Y")]

X_Recs<-X_Recs[IRECCD<3, good := 1]
X_Recs<-X_Recs[IRECCD>3, bad := 1]
X_Recs<-X_Recs[IRECCD==3, neutral := 1]

X_Recs<-unique(X_Recs, by=c("ANALYS","TICKER","anndats_revision"))


# Determine the revision size    
X_Recs <- X_Recs[order(ANALYS, TICKER, anndats_revision)]
X_Recs[, lag1_value := shift(VALUE, n = 1, type = 'lag'), by = list(ANALYS, TICKER, FPEDATS)]
X_Recs[, revision := VALUE-lag1_value]

X_Recs[, fcst_value := VALUE]


# Merge with actuals
Actuals <- data.table(read.csv("01_InData/actuals.csv"))
Actuals[, FPEDATS:=PENDS]

Actuals<-Actuals[PDICITY=="ANN",]



Actuals<-unique(Actuals, by=c("TICKER","FPEDATS"))

# Calculate ltg-actuals
Actuals[, actual:=VALUE]
Actuals<-Actuals[order(TICKER, FPEDATS)]



# Adjust for stock splits
Actuals<-
  merge(
    Actuals,
    IBES_CRSP,
    by=c("TICKER")
  )
Actuals<-Actuals[ANNDATS>=sdate & ANNDATS<=edate,]
Actuals[, ANNDATS := as.Date(as.character(ANNDATS), "%Y%m%d")]
Actuals[, FPEDATS := as.Date(as.character(FPEDATS),"%Y%m%d")]
Actuals[, year := format(FPEDATS, "%Y")]
Actuals<-na.omit(Actuals, cols="PERMNO")

CRSP_daily <- data.table(read.csv("01_InData/crsp.csv"))
CRSP_daily[, date := as.Date(as.character(date), "%Y%m%d")]

Actuals<-
  merge(
    Actuals,
    CRSP_daily,
    by.x=c("PERMNO","ANNDATS"),
    by.y=c("PERMNO", "date"),
    all.x=TRUE
  )
Actuals[, actual_adj:=actual/CFACSHR]

Actuals<-Actuals[order(TICKER, FPEDATS)]


for (v in (3:5)){
  cmdstr1<-paste0("Actuals[, lead",v,"_actual := shift(actual_adj, n=",v,",type='lead'),by=list(TICKER)]")
  eval(parse(text=cmdstr1))
  cmdstr1<-paste0("Actuals[, ltg_",v," := (((lead",v,"_actual-actual_adj)/abs(actual_adj)+1)^(1/",v,")-1)*100]")
  eval(parse(text=cmdstr1))
}

Actuals<-unique(Actuals, by=c("TICKER","year"))

# Merge the forecast revisions with actuals
Forecasts_act<-
  merge(
    X_Recs,
    Actuals,
    by.x = c("TICKER", "anndats_year"),
    by.y = c("TICKER", "year")
  )


# Determine the y variable
for (v in (3:5)){
  cmdstr1<-paste0("Forecasts_act<-Forecasts_act[, forecast_error", v,"_:= ltg_", v,"-VALUE.x]")
  eval(parse(text=cmdstr1))
}

Forecasts_act <- Forecasts_act %>%
  select(-matches("(.x|.y)$"))

write_dta(Forecasts_act, path="02_OutData/forecasts_act_ltg_ind_eavars.dta")


